clear all
capture log close
program drop _all
set more off
snapshot erase _all

log using "..\Programs/Preparing Foreign Capital Stock Numbers.log", replace

*** Preparing Foreign Capital Stock Numbers.do
*** 7/28/2016
*** Brina Seidel


*************************************
*** Define a program to clean country names
*************************************
program define CLEAN_COUNTRY_NAMES
	replace `1' = trim(`1')
	gen `1'_temp = `1'
	quietly replace `1'_temp = "Yemen, Rep." if strpos(`1', "Yemen") > 0 
	quietly replace `1'_temp = "Vietnam" if strpos(`1', "Viet") > 0
	quietly replace `1'_temp = "Venezuela, RB" if strpos(`1', "Venezuela") > 0
	quietly replace `1'_temp = "Macao SAR, China" if strpos(`1', "Macao") > 0
	quietly replace `1'_temp = "Hong Kong SAR, China" if strpos(`1', "Hong") > 0
	quietly replace `1'_temp = "China" if strpos(`1', "China") > 0 & strpos(`1', "Mainland") > 0
	quietly replace `1'_temp = "Lao PDR" if strpos(`1', "Lao") > 0
	quietly replace `1'_temp = "Korea, Dem. People’s Rep." if strpos(`1', "Korea") > 0 & (strpos(`1', "Dem") > 0 | strpos(`1', "North") > 0)
	quietly replace `1'_temp = "Korea, Rep." if strpos(`1', "Korea") > 0 & ((strpos(`1', "Dem") == 0  & strpos(`1', "North") == 0) | strpos(`1', "South") > 0)
	quietly replace `1'_temp = "Iran, Islamic Rep." if `1' == "Iran, Islamic Republic of"
	quietly replace `1'_temp = "Hong Kong SAR, China" if strpos(`1', "Hong Kong") > 0
	quietly replace `1'_temp = "Gambia, The" if strpos(`1', "Gambia") > 0
	quietly replace `1'_temp = "Congo, Rep." if strpos(`1', "Congo") > 0 & strpos(`1', "Dem") == 0
	quietly replace `1'_temp = "Congo, Dem. Rep." if strpos(`1', "Congo") > 0 & strpos(`1', "Dem") > 0
	quietly replace `1'_temp = "Congo, Dem. Rep." if strpos(`1', "Zaire") > 0
	quietly replace `1'_temp = "Bahamas, The" if strpos(`1', "Bahamas") > 0 
	quietly replace `1'_temp = "Egypt, Arab Rep." if strpos(`1', "Egypt") > 0 
	quietly replace `1'_temp = "Cote d'Ivoire" if strpos(`1', "Ivoire") > 0 
	quietly replace `1'_temp = "Bolivia" if strpos(`1', "Bolivia") > 0 
	quietly replace `1'_temp = "Tanzania" if strpos(`1', "Tanzania") > 0
	quietly replace `1'_temp = "Afghanistan" if strpos(`1', "Afghanistan") > 0
	quietly replace `1'_temp = "Sao Tome and Principe" if strpos(`1', "ncipe") > 0
	quietly replace `1'_temp = "Iran, Islamic Rep." if strpos(`1', "Iran") > 0
	quietly replace `1'_temp = "St. Vincent and the Grenadines" if strpos(`1', "St. Vincent")> 0 | strpos(`1', "Grenadines")> 0
	quietly replace `1'_temp = "Venezuela, RB" if strpos(`1', "Venezuela") > 0
	quietly replace `1'_temp = "Central African Republic" if strpos(`1', "Central African") > 0 
	quietly replace `1'_temp = "Kyrgyz Republic" if strpos(`1', "Kyrgyz") > 0 
	quietly replace `1'_temp = "Macedonia, FYR" if strpos(`1', "Macedonia") > 0 
	quietly replace `1'_temp = "Maldives" if strpos(`1', "Falkand") > 0 
	quietly replace `1'_temp = "Antigua and Barbuda" if strpos(`1', "Antigua") > 0 
	quietly replace `1'_temp = "Cabo Verde" if strpos(`1', "Verde") > 0 
	quietly replace `1'_temp = "Comoros" if strpos(`1', "Comoro") > 0
	quietly replace `1'_temp = "Guinea-Bissau" if strpos(`1', "Guinea") > 0 & strpos(`1', "Bissau") > 0
	quietly replace `1'_temp = "Haiti" if `1' == "Haïti"
	quietly replace `1'_temp = "Myanmar" if strpos(`1', "Burma") > 0
	quietly replace `1'_temp = "Syrian Arab Republic" if strpos(`1', "Syria") > 0
	quietly replace `1'_temp = "Brunei Darussalam" if strpos(`1', "Brunei") > 0 
	quietly replace `1'_temp = "St. Kitts and Nevis" if strpos(`1', "Kitts") >  0 & strpos(`1', "Nevis") > 0
	quietly replace `1'_temp = "Russian Federation" if strpos(`1', "Russia") > 0 
	quietly replace `1'_temp = "Micronesia, Fed. Sts." if strpos(`1', "Micronesia") > 0 
	quietly replace `1'_temp = "Timor-Leste" if strpos(`1', "Timor") > 0 
	quietly replace `1'_temp = "United States" if `1' == "United States of America"
	
	*** Show country names that we updated
	preserve
	keep if `1'_temp != `1'
	contract `1'_temp `1'
	list `1'_temp `1', ab(20) sep(100)
	restore
	replace `1' = `1'_temp
	drop `1'_temp
end

*************************************************************
*************************************************************
*** Foreign Capital Stock Totals (% Developing Country GDP) 1870-1988 (Maddison)
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("Maddison Foreign Cap Stock") firstrow

*** Check the values
*twoway line foreign year
tab year, m

*** Save tempfile
tempfile foreign_cap1870_1998
save `foreign_cap1870_1998'.dta, replace

*************************************************************
*************************************************************
*** FDI Inward Stock 1990-2015 (UNCTAD)
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
import excel  "..\Input Data\Globalization Input Data.xlsx", clear sheet("UNCTAD FDI Inward Stock") firstrow

*** Clean country names to match WB
CLEAN_COUNTRY_NAMES countryname

*************************************
*** Keep only countries in Maddison's developing world, and merge in GDP
*************************************

*** Prepare GDP for countries in Maddison's developing world to merge in
preserve
wbopendata, clear indicator(NY.GDP.MKTP.PP.CD) nometadata
*** Keep only necessary countries
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
*** Reshape the data
rename yr* gdp*
reshape long gdp, i(countryname) j(year)
*** Keep only necessary years
keep if year >= 1990
keep countryname year gdp
tempfile gdp
save `gdp'.dta, replace
restore

*** Merge in GDP, keeping only the countries in Maddison's developing world
merge 1:1 countryname year using `gdp'.dta, keep(2 3) nogen

*************************************
*** Clean missing data
*** For cases where the nonmissing data points are > 5 years apart, we will drop the country from the sample
*************************************

*** Mark countries that are missing data because the countries did not exist yet -- these are not truly "missing"
*** ".a" will refer to values that are truly missing, while ".b" will refer to values that are missing because the country did not exist
preserve
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("COW Year of Country Creation") firstrow
keep countryname start_year
tempfile start_year
save `start_year'.dta, replace
restore
merge m:1 countryname using `start_year'.dta, keep(1 3) nogen norep
replace fdi_stock = .a if fdi_stock == .
replace fdi_stock = .b if year < start_year & start_year < .

*** Drop countries for which the nonmissing data points are > 5 years apart
sort countryname year
gen years_to_nonmiss = 0
forvalues i = 1/55 {
	quietly replace years_to_nonmiss = `i' if years_to_nonmiss == 0 &  fdi_stock[_n+`i'] != .a & countryname[_n+`i'] == countryname
}
bys countryname: egen biggest_gap = max(years_to_nonmiss)
*** Also mark countries with no data at all to drop
gen any_data_temp = (fdi_stock < .)
bys countryname: egen any_data = max(any_data_temp)
replace biggest_gap = . if any_data == 0
*** Show the countries we are dropping
tab countryname if biggest_gap > 5
drop if biggest_gap > 5

*** Confirm there is no missing data that we need to impute
assert fdi_stock != .a

*** Exclude the GDP for countries that are missing FDI data because they had not been created yet
replace gdp = . if fdi_stock == .b

*************************************
*** Compare developing world GDP to the GDP of countries in our sample in 2014
*************************************
preserve
wbopendata, clear indicator(NY.GDP.MKTP.CD) year(2014) nometadata
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
rename yr* gdp_current*
reshape long gdp_current, i(countryname) j(year)
tempfile gdp_current
save `gdp_current'.dta, replace
restore
preserve
keep if year == 2014 & fdi_stock < . 
merge 1:1 countryname year using `gdp_current'.dta
gen gdp_current_sample = gdp_current if _merge == 3
collapse (sum) gdp*, by(year)
format gdp* %16.0fc
gen pct_excluded = (gdp_current - gdp_current_sample)/gdp_current * 100
list year gdp_current gdp_current_sample pct_excluded
restore

*************************************
*** Add up total for the developing world
*************************************
collapse (sum) fdi_stock gdp, by(year)

*************************************
*** Calculate FDI stock as a percent of GDP
*************************************
gen fdi_stock_pct = fdi_stock/gdp * 100

*** Save tempfile
keep year fdi_stock_pct
tempfile fdi_stock1990_2015
save `fdi_stock1990_2015'.dta, replace

*************************************************************
*************************************************************
*** External Debt 1990-2014 (WDI) 
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
wbopendata, clear indicator(DT.DOD.DECT.CD; NY.GDP.MKTP.PP.CD) nometadata

*** Keep only neessary countries
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
	
*** Reshape the data
encode indicatorcode, gen(indicator_num)
label list indicator_num
drop indicatorname indicatorcode
reshape wide yr*, i(countryname) j(indicator_num)
rename yr*1 external_debt* 
rename yr*2 gdp*
reshape long external_debt gdp, i(countryname) j(year)

*** Keep only necessary years
assert external_debt == . if year == 2015
keep if year >= 1990 & year <= 2014

*************************************
*** Clean missing data:
*** 1. For cases where the nonmissing data points are > 5 years apart, we will drop the country from the sample
*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of merchandise exports to GDP backwards
*** Note: There are no cases with gaps of <= 5 years; if there were, we would impute missing values. 
*************************************

*** 1. For cases where the nonmissing data points are > 5 years apart, we will drop the country from the sample

*** Mark countries that are missing data because the countries did not exist yet -- these are not truly "missing"
*** ".a" will refer to values that are truly missing, while ".b" will refer to values that are missing because the country did not exist
preserve
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("COW Year of Country Creation") firstrow
keep countryname start_year
tempfile start_year
save `start_year'.dta, replace
restore
merge m:1 countryname using `start_year'.dta, keep(1 3) nogen norep
replace external_debt = .a if external_debt == .
replace external_debt = .b if year < start_year & start_year < .

*** Drop countries for which the nonmissing data points are > 5 years apart
sort countryname year
gen years_to_nonmiss = 0
forvalues i = 1/55 {
	quietly replace years_to_nonmiss = `i' if years_to_nonmiss == 0 &  external_debt[_n+`i'] != .a & countryname[_n+`i'] == countryname
}
bys countryname: egen biggest_gap = max(years_to_nonmiss)
*** Also mark countries with no data at all to drop
gen any_data_temp = (external_debt < .)
bys countryname: egen any_data = max(any_data_temp)
replace biggest_gap = . if any_data == 0
*** Show the countries we are dropping
tab countryname if biggest_gap > 5
preserve
keep if biggest_gap > 5
contract countryn
list coun, ab(20) sep(100)
restore
drop if biggest_gap > 5

*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of merchandise exports to GDP backwards

*** First, mark the first year for which we data for each country
gen has_data = year if external_debt < .
bys countryname: egen first_data = min(has_data)
drop has_data
*** Check which countries we will be extrapolating data for
preserve
keep if first_data > 1990 & first_data < .
contract countryname first_data
list countryname first_data, ab(20) sep(100)
restore
*** Get the earliest ratio of external debt to gdp for the countries for which we will be extrapoling data
gen temp_first_ratio = external_debt/gdp if year == first_data & first_data > 1990 & first_data < .
bys countryname: egen first_ratio = max(temp_first_ratio)
drop temp_first_ratio
*** Use the earliest ratio of external debt to gdp to calculate external debt for the years that it is missing
assert external_debt == .a  if year < first_data & year >= start_year
replace external_debt = gdp * first_ratio if year < first_data & external_debt == .a
drop first_data first_ratio

*** Exclude the GDP for countries that are missing external debt data
replace gdp = . if external_debt >= .

*************************************
*** Compare developing world GDP to the GDP of countries in our sample in 2014
*************************************
preserve
wbopendata, clear indicator(NY.GDP.MKTP.CD) year(2014) nometadata
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
rename yr* gdp_current*
reshape long gdp_current, i(countryname) j(year)
tempfile gdp_current
save `gdp_current'.dta, replace
restore
preserve
keep if year == 2014 & external_debt < . 
merge 1:1 countryname year using `gdp_current'.dta
gen gdp_current_sample = gdp_current if _merge == 3
collapse (sum) gdp*, by(year)
format gdp* %16.0fc
gen pct_excluded = (gdp_current - gdp_current_sample)/gdp_current * 100
list year gdp_current gdp_current_sample pct_excluded
restore

*************************************
*** Add up total for the developing world
*************************************
collapse (sum) external_debt gdp, by(year)

*************************************
*** Calculate external debt as a percent of developing world GDP
*************************************
gen external_debt_pct = external_debt/gdp*100

*** Save tempfile
keep year external_debt_pct
tempfile external_debt1990_2014
save `external_debt1990_2014'.dta, replace

*************************************************************
*************************************************************
*** Portfolio equity stock, 1990-2011 - External Wealth of Nations
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
set more off
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("ENW Portfolio Equity Stock") firstrow
desc, f

*** Keep only neecessary years
sum year
keep if year >= 1990

*** Clean country name 
rename country_name countryname
CLEAN_COUNTRY_NAMES countryname

*************************************
*** Keep only countries in Maddison's developing world, and merge in GDP
*************************************

*** Prepare GDP for countries in Maddison's developing world to merge in
preserve
wbopendata, clear indicator(NY.GDP.MKTP.PP.CD) nometadata
*** Keep only necessary countries
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
*** Reshape the data
rename yr* gdp*
reshape long gdp, i(countryname) j(year)
*** Keep only necessary years
keep if year >= 1990 & year <= 2011
keep countryname year gdp
tempfile gdp
save `gdp'.dta, replace
restore

*** Merge in GDP, keeping only countries in Maddison's developing world
drop if wbcode == "ZZ"
merge 1:1 countryname year using `gdp'.dta, keep(2 3) nogen

*************************************
*** Clean missing data:
*** 1. For cases where the nonmissing data points are > 5 years apart, we will drop the country from the sample
*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of merchandise exports to GDP backwards
*** Note: There are no cases with gaps of <= 5 years; if there were, we would impute missing values. 
*************************************

*** 1. For cases where the nonmissing data points are > 5 years apart, we will drop the country from the sample

*** Mark countries that are missing data because the countries did not exist yet -- these are not truly "missing"
*** ".a" will refer to values that are truly missing, while ".b" will refer to values that are missing because the country did not exist
preserve
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("COW Year of Country Creation") firstrow
keep countryname start_year
tempfile start_year
save `start_year'.dta, replace
restore
merge m:1 countryname using `start_year'.dta, keep(1 3) nogen norep
replace port_eq_liabilities = .a if port_eq_liabilities == .
replace port_eq_liabilities = .b if year < start_year & start_year < .

*** Drop countries for which the nonmissing data points are > 5 years apart
sort countryname year
gen years_to_nonmiss = 0
forvalues i = 1/55 {
	quietly replace years_to_nonmiss = `i' if years_to_nonmiss == 0 &  port_eq_liabilities[_n+`i'] != .a & countryname[_n+`i'] == countryname
}
bys countryname: egen biggest_gap = max(years_to_nonmiss)
*** Also mark countries with no data at all to drop
gen any_data_temp = (port_eq_liabilities < .)
bys countryname: egen any_data = max(any_data_temp)
replace biggest_gap = . if any_data == 0
*** Show the countries we are dropping
tab countryname if biggest_gap > 5
preserve
keep if biggest_gap > 5
contract countryn
list coun, ab(20) sep(100)
restore
drop if biggest_gap > 5

*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of merchandise exports to GDP backwards

*** First, mark the first year for which we data for each country
gen has_data = year if port_eq_liabilities < .
bys countryname: egen first_data = min(has_data)
drop has_data
*** Check which countries we will be extrapolating data for
preserve
keep if first_data > 1990 & first_data < .
contract countryname first_data
list countryname first_data, ab(20) sep(100)
restore
*** Get the earliest ratio of portfolio equity stocks to gdp for the countries for which we will be extrapoling data
gen temp_first_ratio = port_eq_liabilities/gdp if year == first_data & first_data > 1990 & first_data < .
bys countryname: egen first_ratio = max(temp_first_ratio)
drop temp_first_ratio
*** Use the earliest ratio of portfolio equity stocks to gdp to calculate portfolio equity stocks for the years that it is missing
assert port_eq_liabilities == .a  if year < first_data & year >= start_year & start_year < .
replace port_eq_liabilities = gdp * first_ratio if year < first_data & port_eq_liabilities == .a 
drop first_data first_ratio

*** Confirm no more missing cases
assert port_eq_liabilities != .a if gdp < .

*************************************
*** Exclude the GDP for countries that are missing portfolio stock data
*************************************
replace gdp = . if port_eq_liabilities >= .

*************************************
*** Save the 2000 through 2011 data, which we will use later with the IMF BOPS to impute flows for the missing years
*************************************
preserve
keep if year >= 2000 & year <= 2011 & countryname != ""
keep wbcode countryname year port_eq_liabilities
isid countryname year
sort countryname year
tempfile port_stock00_11
save `port_stock00_11'.dta, replace
restore

*************************************
*** Add up total for the developing world
*************************************
collapse (sum) port_eq_liabilities gdp, by(year)

*************************************
*** Calculate portfolio stock as a percent of developing world GDP
*************************************
gen port_eq_liabilities_pct = port_eq_liabilities/gdp*100

*** Save tempfile
tempfile portfolio_stock1990_2011
save `portfolio_stock1990_2011'.dta, replace

*************************************************************
*************************************************************
*** Portfolio equity stock, 2012-2015 - IMF BOPS
*** (Add IMF BOPS flows from 2012-2015 to the 2011 stocks from ENW countries to get total stocks for 2012-2015)
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
import excel using "..\Input Data\Globalization Input Data.xlsx", clear sheet("IMF BOPS Portfolio Equity Flows") firstrow
desc, f

*** Keep only necessary years
keep if year >= 2001

*** Clean country names
rename country_name countryname
CLEAN_COUNTRY_NAMES countryname

*** Keep only the countries for which we have ENW portfolio flow data
preserve
use `port_stock00_11'.dta, clear
keep countryname
duplicates drop
tempfile to_keep
save `to_keep'.dta, replace
restore
merge m:1 countryname using `to_keep'.dta, keep(3) nogen noreport

*************************************
*** Append the ENW data (this will also bring in countries that had ENW portfolio equity stock but are not in the IMF data)
*************************************
merge 1:1 countryname year using `port_stock00_11'.dta, nogen
codebook countryname
assert port_eq_liabilities == . if year > 2011

*** Fill in 2012-2015 for countries that were not in the ENW data
encode countryname, gen(country_num)
drop countryname
tsset country_num year 
tsfill, full
decode country_num, gen(countryname)
drop country_num 

*************************************
*** Merge in GDP so that we can estimate portfolio equity stock data for countries that are missing portfolio equity flow data
*** by assuming that the ratio of portfolio equity stocks to GDP stayed constant in 2012-2015
*************************************

preserve
wbopendata, clear indicator(NY.GDP.MKTP.PP.CD) nometadata
*** Keep only necessary countries
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
*** Reshape the data
rename yr* gdp*
reshape long gdp, i(countryname) j(year)
*** Keep only necessary years
keep if year >= 2000
keep countryname year gdp
tempfile gdp
save `gdp'.dta, replace
restore
merge 1:1 countryname year using `gdp'.dta, assert(2 3) keep(3) nogen
*** Save a tempfile with  stocks as a percent of GDP for the first year (2000)
preserve
gen stock_pct2000_temp = port_eq_liabilities/gdp if year == 2000
bys countryname: egen stock_pct2000 = max(stock_pct2000_temp)
keep countryname year stock_pct2000
keep countryname year stock_pct2000
tempfile stock_pct2000
save `stock_pct2000'.dta, replace
restore

*************************************
*** Calculate portfolio equity stock for 2001-2011 by adding flows to stocks from the previous year
*** (We have actual portfolio equity stock values for 2001-2011 from ENW, 
*** so by checking the difference between our calculated value and the actual value, we can estimate how much of the stock falls off each year)
*************************************

forvalues i = 2001/2011 {

	preserve
	local last_year = `i' - 1
	quietly replace port_eq_liabilities = . if year > `last_year'
	
	*** Merge in portfolio equity stocks as a % of GDP in the previous year
	merge 1:1 countryname year using `stock_pct`last_year''.dta, assert(3) nogen norep
	
	*** Calculate portfolio equity flows by adding this year's flows to last year's stocks
	quietly bys countryname: egen port_eq_liabilities`last_year' = max(port_eq_liabilities)
	quietly gen temp_port_eq_stocks = port_eq_liabilities`last_year' +  port_eq_flow if year == `i'
	
	*** For cases that are missing portfolio equity flow data, calculate portfolio equity stocks by assuming that stocks as a % of GDP is the same as last year
	assert port_eq_flow == . if temp_port_eq_stocks == . &  year == `i'
	quietly replace temp_port_eq_stocks = gdp * stock_pct`last_year' if temp_port_eq_stocks == . &  year == `i' 
		
	*** Assign year `i' total to the entire country
	assert temp_port_eq_stocks == . if year != `i'
	assert temp_port_eq_stocks < . if year == `i' & stock_pct`last_year' < .
	quietly bys countryname: egen port_eq_liabilities`i' = max(temp_port_eq_stocks)
	order port_eq_liabilities`i', after(port_eq_liabilities`last_year')
	
	*** Get our calculated value for flows in year `i'
	gen imputed_flows`i' = port_eq_liabilities`i' - port_eq_liabilities`last_year'

	*** Save a tempfile 
	rename port_eq_liabilities`i' port_eq_liabilities`i'_calc
	label variable port_eq_liabilities`i'_calc "Calculated Port Eq Liabilities in `i' (for comparison)"
	keep countryname year port_eq_liabilities`i'_calc imputed_flows`i' gdp
	tempfile calc_`i'_stock 
	save `calc_`i'_stock'.dta, replace
	
	*** Save a tempfile with just stocks as a percent of GDP for this year
	quietly gen stock_pct`i'_temp = port_eq_liabilities`i'_calc/gdp if year == `i'
	quietly bys countryname: egen stock_pct`i' = max(stock_pct`i'_temp)
	keep countryname year stock_pct`i'
	tempfile stock_pct`i'
	quietly save `stock_pct`i''.dta, replace
	
	restore
}


*** Get the difference between calculated and real values
forvalues i = 2001/2011 {

	*** Merge in data on calculated stocks
	merge 1:1 countryname year using `calc_`i'_stock'.dta, assert(3) nogen norep 
	
	*** Calculate the percent difference between calculated and real stocks
	quietly gen port_eq_liabilities`i'_2 = port_eq_liabilities if year == `i'
	quietly bys countryname: egen port_eq_liabilities`i' = max(port_eq_liabilities`i'_2)
	drop port_eq_liabilities`i'_2
	quietly gen diff_pct_`i' = ((port_eq_liabilities`i'-imputed_flows`i') - (port_eq_liabilities`i'_calc-imputed_flows`i'))/(port_eq_liabilities`i'_calc-imputed_flows`i')

	*** Get the mean diff_pct_`i' for each country, weighted by the country's share of portfolio equity stock
	preserve
	contract countryname diff_pct_`i' port_eq_liabilities`i'
	quietly egen port_eq_liabilities`i'_tot = total(port_eq_liabilities`i')
	quietly gen port_eq_liabilities`i'_pct = port_eq_liabilities`i'/port_eq_liabilities`i'_tot
	quietly gen diff_pct_`i'_weighted = diff_pct_`i' * port_eq_liabilities`i'_pct * _N
	*** Drop outliers
	quietly sum diff_pct_`i'_weighted, det
	drop if diff_pct_`i'_weighted > r(mean) + 2*r(sd)
	quietly sum diff_pct_`i'_weighted, det
	*** Save the value of the mean percent difference bewteen calculated and real values
	local loss_of_stock`i' = r(mean)
	restore
}

*** Get the mean loss of stock percent from 2001 through 2011
local loss_of_stock = 0
local n = 0
forvalues i = 2001/2011 {
	local loss_of_stock = `loss_of_stock' + `loss_of_stock`i''
	local n = `n' + 1
}
local loss_of_stock = `loss_of_stock'/`n'
disp "Depreciation rate (2001-2011 average) = `loss_of_stock'"

*** Drop variables and observations that we no longer need
drop if year < 2011
drop port_eq_liabilities*_calc port_eq_liabilities2001-port_eq_liabilities2010 diff_pct_* imputed_flows*


*************************************
*** Estimate 2012-2015 portfolio equity stocks by adding flows to stocks from the previous year
*************************************
forvalues i = 2012/2015 {
	
	local last_year = `i' - 1
	
	*** Calculate stocks as a percent of GDP for last year
	gen stock_pct`last_year'_temp = port_eq_liabilities`last_year'/gdp if year == `last_year'
	bys countryname: egen stock_pct`last_year' = max(stock_pct`last_year'_temp)
	
	*** Calculate portfolio equity flows by adding this year's flows to last year's stocks, minus the amount that depreciated
	gen temp_port_eq_stocks = port_eq_liabilities`last_year'*(1+`loss_of_stock') +  port_eq_flow if year == `i'
	
	*** For cases that are missing portfolio equity flow data, calculate portfolio equity stocks by assuming that stocks as a % of GDP is the same as last year
	assert port_eq_flow == . if temp_port_eq_stocks == . &  year == `i' 
	replace temp_port_eq_stocks = gdp * stock_pct`last_year' if temp_port_eq_stocks == . &  year == `i' 
		
	*** Assign year `i' total to the entire country
	assert temp_port_eq_stocks == . if year != `i'
	assert temp_port_eq_stocks < . if year == `i' & stock_pct`last_year' < . & gdp < .
	bys countryname: egen port_eq_liabilities`i' = max(temp_port_eq_stocks)
	order port_eq_liabilities`i', after(port_eq_liabilities`last_year')
	drop temp_port_eq_stocks stock_pct*

}

*************************************
*** Reshape the data
*************************************
keep countryname year gdp port_eq_liabilities20*
reshape wide gdp, i(countryname) j(year)
duplicates drop
isid countryname
reshape long port_eq_liabilities gdp, i(countryname) j(year)
drop if year == 2011

*************************************
*** Compare developing world GDP to the GDP of countries in our sample in 2014
*************************************
preserve
wbopendata, clear indicator(NY.GDP.MKTP.CD) year(2014) nometadata
keep if inlist(region, "Middle East & North Africa (all income levels)", "Latin America & Caribbean (all income levels)", "East Asia & Pacific (all income levels)", "South Asia", "Sub-Saharan Africa (all income levels)") ///
	& !inlist(countryname, "Japan", "Australia", "New Zealand")
rename yr* gdp_current*
reshape long gdp_current, i(countryname) j(year)
tempfile gdp_current
save `gdp_current'.dta, replace
restore
preserve
keep if year == 2014 & port_eq_liabilities < . 
merge 1:1 countryname year using `gdp_current'.dta
gen gdp_current_sample = gdp_current if _merge == 3
collapse (sum) gdp*, by(year)
format gdp* %16.0fc
gen pct_excluded = (gdp_current - gdp_current_sample)/gdp_current * 100
list year gdp_current gdp_current_sample pct_excluded
restore

*************************************
*** Add up total for the developing world
*************************************
assert gdp == . if port_eq_liabilities == .
collapse (sum) port_eq_liabilities gdp, by(year)

*************************************
*** Calculate portfolio equity stock as a percent of GDP
*************************************
gen port_eq_liabilities_pct = port_eq_liabilities/gdp * 100

*** Save tempfile
tempfile portfolio_stock2012_2015
save `portfolio_stock2012_2015'.dta, replace

*************************************************************
*************************************************************
*** Combine portfolio equity stock for 1990-2015
*************************************************************
*************************************************************
clear all 
append using `portfolio_stock2012_2015'.dta `portfolio_stock1990_2011'.dta

keep year port_eq_liabilities_pct 
isid year 
tempfile portfolio_stock1990_2015
save `portfolio_stock1990_2015'.dta, replace


*************************************
*** Get total foreign capital stock as a % of developing world GDP for 1990-2014
*************************************
merge 1:1 year using `fdi_stock1990_2015'.dta, nogen 
merge 1:1 year using `external_debt1990_2014'.dta, nogen
merge 1:1 year using `portfolio_stock1990_2015'.dta, nogen

*** Drop 2015 (because we don't have external debt data for 2015)
drop if year == 2015

*** Add up foreign capital stock as a % of developing world GDP
egen foreign_cap_pct = rowtotal(external_debt_pct fdi_stock_pct port_eq_liabilities_pct)

*** Save tempfile
tempfile foreign_cap1990_2014
save `foreign_cap1990_2014'.dta, replace

*************************************************************
*************************************************************
*** Combine foreign capital for 1870-2014
*************************************************************
*************************************************************
clear all
append using `foreign_cap1870_1998'.dta `foreign_cap1990_2014'.dta

*** Check how close our numbers are for cases where we have data from both sources
sort year
duplicates tag year, gen(dup)
list year foreign_cap_pct if dup
*** Keep the data from the most recent source (our calculated values)
drop if year == 1998 & foreign_cap_pct < 22
drop dup 

*************************************
*** Graph the results
*************************************

label variable external_debt_pct "External Debt Stock"
label variable fdi_stock_pct "FDI Stock"
label variable port_eq_liabilities_pct "Portfolio Equity Stock"

*** Three flows separately (line graph)
twoway (line external_debt_pct fdi_stock_pct port_eq_liabilities_pct year if year >=1990 & year <=2015, lwidth(thick thick thick)) ///
	(scatter external_debt_pct fdi_stock_pct port_eq_liabilities_pct year if year==., msymbol(square square square) mcolor("6 78 129" "192 0 26" "202 133 0")), ///
	ytitle("Foreign Capital Stock (% Developing World GDP)", axis(1)) ytick(0(4)16, tlength(0)) ylabel(4(4)16, noticks grid axis(1)) yscale(noline titlegap(2) axis(1)) ///
	xtitle("Year") xlabel(1990(5)2015, angle(45)) ///
	legend(cols(3) order(4 5 6) symysize(1) symxsize(1) size(small)) ///
	title("Composition of Foreign Capital") title("In the Developing World, 1990-2014", suffix) plotregion(margin(zero))

graph display, xsize(10) ysize(6)
graph export "../Graphs/Composition of Foreign Capital Stock (Line).png", as(png) replace 


*** Three flows separately (bar graph)
graph bar external_debt_pct fdi_stock_pct port_eq_liabilities_pct if year >= 1990 & year <= 2015, ///
	stack over(year, label(labsize(small) angle(45) labgap(1.8))) ///
	ytitle("Foreign Capital Stock (% Developing World GDP)", axis(1)) ytick(0(5)35, axis(1) tlength(0)) ylabel(5(5)35, noticks grid) yscale(noline titlegap(2) axis(1)) ///
	legend(cols(3) symysize(2) symxsize(2) size(small) label(1 "External Debt Stock") label(2 "FDI Stock") label(3 "Portfolio Equity Stock")) ///
	title("Composition of Foreign Capital") title("In the Developing World, 1990-2014", suffix) plotregion(margin(zero))
graph display, xsize(10) ysize(6)	
graph export "../Graphs/Composition of Foreign Capital Stock (Bar).png", as(png) replace 

*************************************
*** Save the data
*************************************
desc, f
save "Foreign Capital Stock - Percent of Developing World GDP.dta", replace

log close

